#### "How do past repression and indoctrination affect redistributive preferences?" ####
# authors: "Pelke, Lars"
# date: 2019-10-23
# written under "R version 3.6.0 (2019-03-11)"

#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# set working directory

# loading packages

library(countrycode)
library(tidyverse)
library(viridis)
library(scales)
library(readstata13)

#### Import Data ####

#Load World value survey data

lb1997 <- read.dta13("data/Latinobarometer/Latinobarometro_1997_datos_english_v2014_06_27.dta")
lb2001 <- read.dta13("data/Latinobarometer/Latinobarometro_2001_datos_english_v2014_06_27.dta")
lb2002 <- read.dta13("data/Latinobarometer/Latinobarometro_2002_datos_eng_v2014_06_27.dta")
lb2007 <- read.dta13("data/Latinobarometer/Latinobarometro_2007_datos_eng_v2014_06_27.dta")
lb2009 <- read.dta13("data/Latinobarometer/Latinobarometro_2009_datos_eng_v2014_06_27.dta")
lb2010 <- read.dta13("data/Latinobarometer/Latinobarometro_2010_datos_eng_v2014_06_27.dta")
lb2011 <- read.dta13("data/Latinobarometer/Latinobarometro_2011_eng.dta")
lb2013 <- read.dta13("data/Latinobarometer/Latinobarometro2013Eng.dta")
lb2015 <- read.dta13("data/Latinobarometer/Latinobarometro_2015_Eng.dta")
lb2016 <- read.dta13("data/Latinobarometer/Latinobarometro2016Eng_v20170205.dta")
lb2017 <- read.dta13("data/Latinobarometer/Latinobarometro2017Eng_v20180117.dta")
lb2018 <- read.dta13("data/Latinobarometer/Latinobarometro_2018_Eng_Stata_v20190303.dta")

#### Reduce and rename Datasets ####

# LB 1997#
lb1997 <- lb1997 %>%
  rename(country_code = idenpa, 
         sex = s1, 
         age = s2, 
         education = s10, 
         employment_status = s7a,
         class = s16,
         income_equality = nsp20) %>%
  select(country_code, sex, age, education, employment_status, class, income_equality) %>%
  mutate(year = 1997, 
          wave = "LB 1997")

summary(lb1997)

# LB 2001#
lb2001 <- lb2001 %>%
  rename(country_code = idenpa, 
         sex = s1, 
         age = s2, 
         education = s6, 
         employment_status = s8a,
         class = s16a,
         income_equality = p11st) %>%
  select(country_code, sex, age, education, employment_status, class, income_equality)%>%
  mutate(year = 2001, 
         wave = "LB 2001")
summary(lb2001)

# LB 2002#
lb2002 <- lb2002 %>%
  rename(country_code = idenpa, 
         sex = s1, 
         age = s2, 
         education = s6, 
         employment_status = s8a,
         class = s20,
         income_equality = p16st) %>%
  select(country_code, sex, age, education, employment_status, class, income_equality)%>%
  mutate(year = 2002, 
         wave = "LB 2002")
summary(lb2002)

# LB 2007#
lb2007 <- lb2007 %>%
  rename(country_code = idenpa, 
         sex = s10, 
         age = s11, 
         education = s15, 
         employment_status = s17a,
         class = s28,
         income_equality = p17st) %>%
  select(country_code, sex, age, education, employment_status, class, income_equality)%>%
  mutate(year = 2007, 
         wave = "LB 2007")
summary(lb2007)

# LB 2010#
lb2010 <- lb2010 %>%
  rename(country_code = idenpa, 
         sex = S7, 
         age = S8, 
         education = S14, 
         employment_status = S16A,
         class = S28,
         income_equality = P12ST) %>%
  select(country_code, sex, age, education, employment_status, class, income_equality) %>%
  mutate(year = 2010, 
         wave = "LB 2010")
summary(lb2010)

# LB 2009#
lb2009 <- lb2009 %>%
  rename(country_code = idenpa, 
         sex = s5, 
         age = s6, 
         education = s12, 
         employment_status = s14a,
         class = s26,
         income_equality = p14st) %>%
  select(country_code, sex, age, education, employment_status, class, income_equality) %>%
  mutate(year = 2009, 
         wave = "LB 2009")
summary(lb2009)

# LB 2011#
lb2011 <- lb2011 %>%
  rename(country_code = idenpa, 
         sex = S16, 
         age = S17, 
         education = S21, 
         employment_status = S23A,
         class = S34,
         income_equality = P12ST) %>%
  select(country_code, sex, age, education, employment_status, class, income_equality) %>%
  mutate(year = 2011, 
         wave = "LB 2011")
summary(lb2011)

# LB 2013#
lb2013 <- lb2013 %>%
  rename(country_code = idenpa, 
         sex = S10, 
         age = S11, 
         education = S17, 
         employment_status = S19_A,
         class = S27,
         income_equality = P27ST) %>%
  select(country_code, sex, age, education, employment_status, class, income_equality) %>%
  mutate(year = 2013, 
         wave = "LB 2013")
summary(lb2013)


# LB 2015#
lb2015 <- lb2015 %>%
  rename(country_code = idenpa, 
         sex = S12, 
         age = S13, 
         education = S19, 
         employment_status = S21_A,
         class = S29,
         income_equality = P18ST) %>%
  select(country_code, sex, age, education, employment_status, class, income_equality) %>%
  mutate(year = 2015, 
         wave = "LB 2016")
summary(lb2015)

# LB 2016#
lb2016 <- lb2016 %>%
  rename(country_code = idenpa, 
         sex = sexo, 
         age = edad, 
         education = S13, 
         employment_status = S18A,
         class = S22,
         income_equality = P21ST) %>%
  select(country_code, sex, age, education, employment_status, class, income_equality) %>%
  mutate(year = 2016, 
         wave = "LB 2016")
summary(lb2016)

# LB 2017#
lb2017 <- lb2017 %>%
  rename(country_code = idenpa, 
         sex = sexo, 
         age = edad, 
         education = S14, 
         employment_status = S18_A,
         class = S22,
         income_equality = P20ST) %>%
  select(country_code, sex, age, education, employment_status, class, income_equality) %>%
  mutate(year = 2007, 
         wave = "LB 2017")
summary(lb2017)


# LB 2018#
lb2018 <- lb2018 %>%
  rename(country_code = IDENPA, 
         sex = SEXO, 
         age = EDAD, 
         education = S10, 
         employment_status = S14A,
         class = S26,
         income_equality = P23ST) %>%
  select(country_code, sex, age, education, employment_status, class, income_equality) %>%
  mutate(year = 2018, 
         wave = "LB 2018")
summary(lb2018)


latinobarometer <- lb1997 %>%
  bind_rows(lb2001, lb2002, lb2007, lb2009, lb2010, lb2011, 
            lb2013, lb2015, lb2016, lb2017, lb2018)

summary(latinobarometer)

#### Prepare WVS Variables for Empirical Analysis ####
summary(latinobarometer$sex)

latinobarometer <- latinobarometer %>%
  mutate(sex = case_when(sex == "Female" ~ 1, 
                         sex == "Woman" ~ 1, 
                         sex == "Male" ~ 0, 
                         sex == "Man" ~ 1))

table(latinobarometer$sex)
sum(is.na(latinobarometer$sex))

summary(latinobarometer$age)
latinobarometer <- latinobarometer %>%
  mutate(age = ifelse(age<0, NA, age))
summary(latinobarometer$age)
sum(is.na(latinobarometer$age))

## Education ##

summary(latinobarometer$education)
table(latinobarometer$education)


latinobarometer <- latinobarometer %>%
  mutate(education = case_when(education == "1 year" ~ 1,
                               education == "2 years" ~ 1,
                               education == "3 years" ~ 1,
                               education == "4 years" ~ 1,
                               education == "5 years" ~ 1,
                               education == "6 years" ~ 1,
                               education == "7 years" ~ 2,
                               education == "8 years" ~ 2,
                               education == "10 years" ~ 2,
                               education == "11 years" ~ 2,
                               education == "12 years" ~ 2,
                               education == "High school/academies/Complete technical training" ~ 2,
                               education == "High school/academies/Incomplete technical training" ~ 2,
                               education == "Completed university" ~ 3,
                               education == "Incomplete university" ~ 3,
                               education == "Without education" ~ 1), 
         education = ifelse(education=="Not asked", NA, 
                            ifelse(education=="No answer/Refused", NA, education)))
           
## Birth Year ##       

latinobarometer <- latinobarometer %>%
  mutate(birth_year = year-age)
summary(latinobarometer$birth_year)
sum(is.na(latinobarometer$birth_year))

## Social Class ##

table(latinobarometer$class)

latinobarometer <- latinobarometer %>%
  mutate(social_class = case_when(class == "Very bad" ~ 1,
                                  class == "Bad" ~ 2,
                                  class == "Not bad" ~ 3,
                                  class == "Average" ~ 3,
                                  class == "Regular" ~ 3,
                                  class == "Good" ~ 4,
                                  class == "Very good" ~ 5),
         social_class = ifelse(class=="Not asked", NA, 
                            ifelse(class=="No answer/Refused", NA, social_class)))

table(latinobarometer$social_class)

## Employment Status ##
table(latinobarometer$employment_status)

latinobarometer <- latinobarometer %>%
  mutate(unemployed= ifelse(employment_status=="Temporarily out of work", 1,
                            ifelse(employment_status =="Temporarily doesnot work", 1, 
                                   ifelse(employment_status=="Not asked", NA, 
                                          ifelse(employment_status =="No answer/Refused", NA, 0 )))))
                                                 
table(latinobarometer$unemployed)

summary(latinobarometer$unemployed)
sum(is.na(latinobarometer$unemployed))

#### Mutating Dependent Variables ####

table(latinobarometer$income_equality)

latinobarometer <- latinobarometer %>%
  mutate(income_equality = case_when(income_equality == "Very fair" ~ 1,
                                     income_equality == "Very Fair" ~ 1,
                                     income_equality == "Fair" ~2,
                                     income_equality == "Unfair" ~ 3,
                                     income_equality == "Very Unfair" ~ 4,
                                     income_equality == "Very unfair" ~ 4),
         income_equality = ifelse(income_equality=="Not asked", NA, 
                               ifelse(income_equality=="No answer/Refused", NA, 
                                      ifelse(income_equality=="Don`t know", NA, 
                                             ifelse(income_equality=="Don´t know", NA, 
                                                    ifelse(income_equality=="Dont know", NA, 
                                                           ifelse(income_equality=="No answer", NA,
                                                                  ifelse(income_equality=="No Answer", NA, income_equality))))))))

summary(latinobarometer$income_equality)

#### Country Codes to World Values Survey Country Items ####

latinobarometer$iso3n <- countrycode(latinobarometer$country_code, "country.name", "iso3n", warn = TRUE)

latinobarometer$iso3n[latinobarometer$country_code == "[%214%] Rep. Dominicana"] <- 214
latinobarometer$iso3n[latinobarometer$country_code == "[%76%] Brasil"] <- 076

#### Age and Cohorts Latinobarometer ####

## cohort of respondents ##

latinobarometer <- latinobarometer %>%
  mutate(cohort = birth_year)
summary(latinobarometer$cohort)


latinobarometer <- latinobarometer %>%
  mutate(cohort_5 = cut(latinobarometer$cohort, seq(1885, 2020, by = 5), right = F, labels = c(1885, 1890, 1895, 1900, 1905,
                                                                                       1910, 1915, 1920, 1925, 1930, 
                                                                                       1935, 1940, 1945, 1950, 1955,
                                                                                       1960, 1965, 1970, 1975, 1980,
                                                                                       1985, 1990, 1995, 2000, 2005, 
                                                                                       2010, 2015)
  )) 

latinobarometer$cohort_5 <- as.numeric(as.character(latinobarometer$cohort_5))

latinobarometer <- latinobarometer %>%
  mutate(cohortmatch5_15 = cohort_5 + 15) # 5-year cohorts year plus 15 years" socialization years

latinobarometer <- latinobarometer %>%
  mutate(cohortmatch5_20 = cohort_5 + 20) # 5-year cohorts year plus 20 years of socialization

table(latinobarometer$cohort_5)
table(latinobarometer$cohortmatch5_15)
table(latinobarometer$cohortmatch5_20)

#### Generate Dataset ID ####

latinobarometer <- latinobarometer %>%
  mutate(data = "Latinobarometer")
table(latinobarometer$data)

#################################################################################################################
#################################################################################################################

#### RECODING OF VARIABLES FOR HARMONIZATION OF DATASETS ####

## Rescale Dependent Variables ##

# income_equality

table(latinobarometer$income_equality)

latinobarometer$income_equality <- rescale(latinobarometer$income_equality, to = c(0, 100))
table(latinobarometer$income_equality)


latinobarometer$education_3 <- latinobarometer$education 

#### SAVE DATASET ####

latinobarometer <- latinobarometer %>%
  dplyr::select(data, iso3n, year, wave, sex, age, education, education_3, birth_year,  
                social_class, unemployed, income_equality,  
                cohort_5, cohortmatch5_15, cohortmatch5_20)

saveRDS(latinobarometer, file = "data/latinobarometer_prepared.rds")





